查看原文
其他

R语言 | ggpubr包让数据可视化更加优雅

大邓 大邓和他的Python
2024-09-09

Hadley Wickham的ggplot2 是一个出色且灵活的包,用于在 R 中进行优雅的数据可视化。但是,默认生成的图需要一些格式才能发送它们以供发布。此外,要自定义 ggplot,语法是负责的,这提高了没有高级 R 编程技能的研究人员的难度。

ggpubr包 提供了一些易于使用的功能,可以使用更简单的语法代码绘制出可供发表出版的图表。


安装

install.packages("ggpubr")

折线图

library(ggpubr)

df <- data.frame(supp=rep(c("VC", "OJ"), each=3),
dose=rep(c("D0.5", "D1", "D2"),2),
len=c(6.8, 15, 33, 4.2, 10, 29.5))
#print(df)
#> supp dose len
#> 1 VC D0.5 6.8
#> 2 VC D1 15.0
#> 3 VC D2 33.0
#> 4 OJ D0.5 4.2
#> 5 OJ D1 10.0
#> 6 OJ D2 29.5

# Plot "len" by "dose" and
# Change line types and point shapes by a second groups: "supp"
ggline(df, x="dose", y="len",
linetype = "supp", shape = "supp")

# Change colors
# +++++++++++++++++++++

# Change color by group: "supp"
# Use custom color palette
ggline(df, x="dose", y="len",
linetype = "supp", shape = "supp",
color = "supp", palette = c("#00AFBB", "#E7B800"))

散点图

# Load data
data("mtcars")
df <- mtcars
df$cyl <- as.factor(df$cyl)
#head(df[, c("wt", "mpg", "cyl")], 3)
#> wt mpg cyl
#> Mazda RX4 2.620 21.0 6
#> Mazda RX4 Wag 2.875 21.0 6
#> Datsun 710 2.320 22.8 4

# Textual annotation
# +++++++++++++++++
df$name <- rownames(df)
ggscatter(df, x = "wt", y = "mpg",
color = "cyl", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
label = "name", repel = TRUE)


饼形图

df <- data.frame(
group = c("Male", "Female", "Child"),
value = c(25, 25, 50))

#head(df)
#> group value
#> 1 Male 25
#> 2 Female 25
#> 3 Child 50


# Basic pie charts
# ++++++++++++++++++++++++++++++++

ggpie(df, "value", label = "group")

ggpie(df, "value", label = "group", fill="group")

ggpie(df, "value", label = "group", fill="group",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
color='white')

labs <- paste0(df$group, " (", df$value, "%)")
#> "Male (25%)" "Female (25%)" "Child (50%)"

ggpie(df, "value", label = labs, fill="group",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
color='white')

labs <- paste0(df$group, " (", df$value, "%)")
#> "Male (25%)" "Female (25%)" "Child (50%)"

ggpie(df, "value", label = labs, fill="group",
lab.pos = "in", lab.font = "white",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
color='white')

甜甜圈图

#> head(df)
#> group value
#> 1 Male 25
#> 2 Female 25
#> 3 Child 50
#>
# Change the position and font color of labels
ggdonutchart(df, "value", label = labs,
lab.pos = "in", lab.font = "white",
fill = "group", color = "white",
palette = c("#00AFBB", "#E7B800", "#FC4E07"))


点图

# Load data
data("mtcars")
dfm <- mtcars
# Convert the cyl variable to a factor
dfm$cyl <- as.factor(dfm$cyl)
# Add the name colums
dfm$name <- rownames(dfm)
# Inspect the data
#head(dfm[, c("name", "wt", "mpg", "cyl")])
#> name wt mpg cyl
#> Mazda RX4 Mazda RX4 2.620 21.0 6
#> Mazda RX4 Wag Mazda RX4 Wag 2.875 21.0 6
#> Datsun 710 Datsun 710 2.320 22.8 4
#> Hornet 4 Drive Hornet 4 Drive 3.215 21.4 6
#> Hornet Sportabout Hornet Sportabout 3.440 18.7 8
#> Valiant Valiant 3.460 18.1 6

ggdotchart(dfm, x = "name", y = "mpg",
color = "cyl", # Color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"), # Custom color palette
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)

密度图

set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
#head(wdata, 4)
#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430

# Density plot with mean lines and marginal rug
# :::::::::::::::::::::::::::::::::::::::::::::::::::
# Change outline and fill colors by groups ("sex")
# Use custom palette
ggdensity(wdata, x = "weight", color='sex')

ggdensity(wdata, x = "weight", color='sex', add='mean')

ggdensity(wdata, x = "weight", color='sex', add='mean', rug=TRUE)

ggdensity(wdata, x = "weight", color='sex', add='mean', rug=TRUE, fill='sex')

ggdensity(wdata, x = "weight", color='sex', add='mean', rug=TRUE, fill='sex',
palette = c("#00AFBB", "#E7B800"))

直方图

# Histogram plot with mean lines and marginal rug
# :::::::::::::::::::::::::::::::::::::::::::::::::::
# Change outline and fill colors by groups ("sex")
# Use custom color palette
gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))


箱图

# Load data
data("ToothGrowth")
df <- ToothGrowth
#head(df, 4)
#> len supp dose
#> 1 4.2 VC 0.5
#> 2 11.5 VC 0.5
#> 3 7.3 VC 0.5
#> 4 5.8 VC 0.5

# Box plots with jittered points
# :::::::::::::::::::::::::::::::::::::::::::::::::::
# Change outline colors by groups: dose
# Use custom color palette
# Add jitter points and change the shape by groups
p <- ggboxplot(df, x = "dose", y = "len", add = "jitter",
color = "dose", shape = "dose",
palette =c("#00AFBB", "#E7B800", "#FC4E07"))
p


条形图

# Load data
data("mtcars")
dfm <- mtcars
# Convert the cyl variable to a factor
dfm$cyl <- as.factor(dfm$cyl)
# Add the name colums
dfm$name <- rownames(dfm)
# Inspect the data
#head(dfm[, c("name", "wt", "mpg", "cyl")])
#> name wt mpg cyl
#> Mazda RX4 Mazda RX4 2.620 21.0 6
#> Mazda RX4 Wag Mazda RX4 Wag 2.875 21.0 6
#> Datsun 710 Datsun 710 2.320 22.8 4
#> Hornet 4 Drive Hornet 4 Drive 3.215 21.4 6
#> Hornet Sportabout Hornet Sportabout 3.440 18.7 8
#> Valiant Valiant 3.460 18.1 6

ggbarplot(dfm, x = "name", y = "mpg",
fill = "cyl", # change fill color by cyl
color = "white", # Set bar border colors to white
palette = "jco", # jco journal color palett. see ?ggpar
sort.val = "desc", # Sort the value in dscending order
sort.by.groups = TRUE, # Sort inside each group
x.text.angle = 90 # Rotate vertically x axis texts
)

ggbarplot(dfm, x = "name", y = "mpg",
fill = "cyl", # change fill color by cyl
color = "white", # Set bar border colors to white
palette = "jco", # jco journal color palett. see ?ggpar
sort.val = "desc", # Sort the value in dscending order
sort.by.groups = TRUE, # Don't sort inside each group
x.text.angle = 90, # Rotate vertically x axis texts
legend.title = "CYL" # Set legend title
)

表格

#Medium blue (mBlue) theme
ggtexttable(head(iris), rows = NULL, theme = ttheme("mBlue"))

main.title <- "Edgar Anderson's Iris Data"
subtitle <- paste0(
"This famous (Fisher's or Anderson's) iris data set gives the measurements",
" in centimeters of the variables sepal length and width and petal length and width,",
" respectively, for 50 flowers from each of 3 species of iris.",
" The species are Iris setosa, versicolor, and virginica."
) %>%
strwrap(width = 80) %>%
paste(collapse = "\n")

tab <- ggtexttable(head(iris), theme = ttheme("light"))
tab %>%
tab_add_title(text = subtitle, face = "plain", size = 10) %>%
tab_add_title(text = main.title, face = "bold", padding = unit(0.1, "line")) %>%
tab_add_footnote(text = "*Table created using ggpubr", size = 10, face = "italic")


精选内容

管理世界 | 使用文本分析词构建并测量短视主义

管理世界 | 使用 经营讨论与分析 测量 企业数字化指标

管理世界 | 用正则表达式、文本向量化、线性回归算法从md&a数据中计算 「企业融资约束指标

管理世界 | 政府与市场心理因素的经济影响及其测度

叙事经济学:揭示经济中的叙事

中文心理词典,含具体性、可成象性等指标

PNAS | 14000+篇心理学顶刊论文可复现性调研(含代码)

可视化 | 词嵌入模型用于计算社科领域刻板印象等信息(含代码)

可视化 | 绘制《三体》人物关系网络图

可视化 | 99-21年地方政府报告关键词变化趋势

可视化 | 文本数据分成n等份、词云图、情绪变化趋势、相似度变化趋势

文本分析 | 中国企业高管团队创新注意力(含代码)

文本分析 | MD&A 信息含量指标构建代码实现

金融研究 | 使用Python构建「关键审计事项信息含量」

转载 | 大数据驱动的「社会经济地位」分析研究综述

使用 Word2Vec 和 TF-IDF 计算五类企业文化

如何用「图嵌入」将企业、高管职业经历表征为向量数据

Nature | 通用中英文六维语义情感词典

采购合同数据集 | 政府采购何以牵动企业创新

96G数据集 | 2亿条中国大陆企业工商注册信息

70G数据集 | 3571万条专利申请数据集(1985-2022年)

数据集 | 3.9G全国POI地点兴趣点数据集

数据集 | 「问询函」

93G数据集 | 中国裁判文书网(2010~2021)

网络爬虫 | 使用Python采集B站弹幕和评论数据

数据集 | 07-21年上市公司「委托贷款公告」

单个csv文件体积大于电脑内存,怎么办?

网络爬虫 | 使用Python披露采集 Up 主视频详情信息

继续滑动看下一个
大邓和他的Python
向上滑动看下一个

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存